# -*- coding: utf-8 -*-
"""
Created on Sat Dec 05 21:25:44 2015

@author: liangrongli
"""

def getCode(url):
    #import urllib
    import urllib2
    import random
    
    user_agent = [
        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.154 Safari/537.36 LBBROWSER',
        'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0) LBBROWSER',
        'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20100101 Firefox/31.0',
        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36',
        'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)'
        ]
    headers = {'User-Agent':random.choice(user_agent)}
    req = urllib2.Request(url,None,headers=headers)
    resp = urllib2.urlopen(req)
    """
    with open('haodou.html','w') as f:
        f.write(resp.read())
    f.closed
    """
    return resp.read()
    
def getList(url):
    import re
    pattern = re.compile('<p class="f14 mgt5"><a href="(.*?)" .*?>.*?</a></p>',re.S)
    page_code = getCode(url)
    lists = re.findall(pattern,page_code)
    #print len(lists),"\n"
    if len(lists):
        return lists
    else:
        return False

if __name__ == "__main__":
    import time
    #from bs4 import BeautifulSoup
    start = time.clock()
    
    #print __name__
    url = "http://www.haodou.com/recipe/all"
    lists = getList(url)
    #soup = BeautifulSoup(r,"html.parser")
    #print soup.title.text
    #print lists
    with open('haodou.txt','w') as f:
        for item in lists:
            f.write(item)
            f.write('\n')
    f.closed
    end = time.clock()
    print "Run Time is %f s" %(end-start)